package com.suyunyou.spider.utils;

import java.io.IOException;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;

import com.suyunyou.spider.model.SpiderException;
import com.system.comm.utils.FrameStringUtil;

/**
 * 下载页面内容
 * @author 岳静
 * @date 2016年6月24日 下午3:33:02 
 * @version V1.0
 */
public class PageDownUtil {

	/**
	 * get方式下载页面
	 * @param url
	 * @return
	 * @throws SpiderException
	 */
	public static String get(String url) throws SpiderException {
		String content;
		try {
			Connection conn = Jsoup.connect(url);
			//设置伪装成浏览器
			conn.userAgent("Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31");
			Document doc = conn.get();
			content = doc.html();
		} catch (IOException e) {
			throw new SpiderException("请求异常: " + e.getMessage(), e);
		}
		if(FrameStringUtil.isEmpty(content)) {
			throw new SpiderException("没有提取到信息");
		}
		return content;
	}
	
	public static void main(String[] args) throws SpiderException, IOException {
		//String content = get("http://www.suyunyou.com");
		//String content = get("http://www.linuxidc.com");
		String content = get("http://www.linuxidc.com/../Linux/2016-06/132356.htm");
		System.out.println(content);
	}
}
